"""
    所需数据 ：
        1.html元素对象的value中
        2.html元素对象的属性中 属性名
    {
        x_path:'//*[@id="tdi_69"]/div/div/div[1]/h2/a'
        is_attr:'href'

    }


"""
from news_2.src.main.HtmlXPathParser.lxml_xpath_parser import LxmlExtractNews

if __name__ == '__main__':
    conf = {
        "key": "mpapress",
        "url": "https://mpapress.com/",
        "html_tag": [
            {
                "key": "title",
                "x_path": "//*[@id=\"cmz-lead\"]/div[1]/div[1]/article/div[2]/h3/a",
                "is_attr": ""
            },
            {
                "key": "link",
                "x_path": "//*[@id=\"cmz-lead\"]/div[1]/div[1]/article/div[2]/h3/a",
                "is_attr": "href"
            },
            {
                "key": "date",
                "x_path": "//*[@id=\"cmz-lead\"]/div[1]/div[1]/article/div[2]/div[1]/div[2]/a",
                "is_attr": ""
            }

        ],
        "encode": "utf-8",
        "decode": "utf-8",
        "start": 1
    }

    news = LxmlExtractNews(conf['url'], conf['encode'], conf['decode'])
    news.reading_html_content("F:/pyProjects/wx-pc/news_2/src/resources/mpapress.txt")
    news.decode_html()
    news.build_html_tags(conf['html_tag'])
    news.process_html_tag_data()
    for item in news.tags:
        print(f"{item.key}:{item.get_data_value()}")
